#!/bin/bash

# Experiment configuration
#if [ "$#" -ne 1 ]; then
#    echo "Usage: bash run.sh METHOD"
#    exit
#fi
tasks="cola sst2 qnli qqp mnli"
#tasks="cola"
com_rds=( "50" "100" "100" "300" "300")
learning_rates="5e-4" # [lora, hetlora, maml ]
het=( "0.3" "0.9"  "0.9" "0.9" "0.9")
#learning_rates="5e-4 1e-3 2e-3" #[ours]
METHOD="pf2lora hetlora"


epochs=4
WORKERS=8
NUM_GPUS=8
function rand(){
    min=$1
    max=$(($2-$min+1))
    num=$(($RANDOM+1000000000))
    echo $(($num%$max+$min))
}
port_num=$(rand 49152 65535)

for m in $METHOD; do
  j=0
  for task in $tasks; do
        for lr in $learning_rates; do
            exp_name="roberta_lora_${task}_lr${lr}"
            i=0
            # Execute command for each task and learning rate
            while [ $i -lt $WORKERS ]; do
              python train_dist.py \
                --name_or_path roberta-base \
                --world_size $WORKERS\
                --dataset $task\
                --seed 0 \
                --method $m \
                --lr_A 1e-2 \
                --lr_B $lr\
                --maml_in_lr 5e-2 \
		            --rank_max 12 \
                --rank_mat 8 \
                --rank_min 5 \
                --fp16 \
                --beta 0.0 \
                --gamma 0.99 \
                --lamb 0.0005 \
                --num_epochs $epochs \
                --port $port_num\
                --batch_size 16\
                --eval_batch_size 8\
                --com_interval 10 \
                --heterogeneity ${het[$j]} \
                --num_gpu $NUM_GPUS \
                --com_rounds ${com_rds[$j]} \
                --gpu $i\
                --rank $i \
                --inner_loops 1\
                --hessian_q 3\
                --z_loops 5 &
              pids="${pids} $!"
              i=$(($i + 1))
            done

            echo "Running task $task with lr $lr and method $m on children pids: $pids"
            wait
            pids=""
        done
        j=$(($j + 1))
  done
done
